In [1]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
In [2]:
df = pd.read_csv("Estonian citizens abroad.csv",
                 header=0)
df.head()
Out[2]:
Nimi Name iso_alpha # of Estonian citizens Continent Sub-region Capital Latitude (capital) Longitude (capital) Distance between Capital and Tallinn (km) Population (2020) GDP PPP per capita Formerly part of the USSR? Sovereignty
0 Venezuela Venezuela VEN 237 Americas South America Caracas 10.500000 -66.933300 9098.0 28435943 NaN No Sovereign
1 Monaco Monaco MCO 28 Europe Western Europe Monaco 43.739600 7.406900 2103.1 39244 NaN No Sovereign
2 Taiwan (Chn) Taiwan TWN 12 Asia Eastern Asia Taipei 25.047800 121.531900 7996.5 23816775 NaN No Sovereign
3 Jersey (Gbr) Jersey JEY 5 Europe Northern Europe Saint Helier 49.185800 -2.110000 2059.7 107800 NaN No Not sovereign
4 Ühendriikide Hajasaared (Usa) United States Minor Outlying Islands UMI 5 Oceania Micronesia Wake Atoll 19.308552 166.631012 10598.8 300 NaN No Not sovereign
In [3]:
target = df["# of Estonian citizens"]
target_label = "# of Estonian citizens"

name = df["Name"]
name_label = "Name"

df["Name and number of citizens"] = name.map(str) + '\n' + target.map(str)
name_target = df["Name and number of citizens"]
name_target_label = "Name and number of citizens"
In [4]:
# normalize distances

distance = df["Distance between Capital and Tallinn (km)"]


distance_norm = (distance-distance.min())/ (distance.max() - distance.min())

df["Distance normalized"] = distance_norm
In [5]:
fig = go.Figure(go.Scattergeo(
    text = df[name_target_label],
        lat = df["Latitude (capital)"],
        lon = df["Longitude (capital)"],
        marker = dict(
        size = df[target_label]**0.36,
        color = df[target_label],
        colorscale = "plasma",
        colorbar_title= target_label
        )
))
fig.update_geos(
    visible=False, resolution=50,
    showcountries=True, projection_type="natural earth",
    showland=True, landcolor="LightBlue"
)
fig.update_traces(customdata=df[name_target_label])
fig.update_traces(hovertemplate='%{customdata}<extra></extra>')

fig.show()
In [6]:
fig = px.scatter(df, x="Distance normalized", y = target_label,
                 color = "Continent",
                 log_y = True,
                 hover_name = "Name",
                 hover_data = {"Distance normalized":False,
                               "Continent":False,
                               target_label:":.0f"},
                 labels = {target_label: "# of Estonian citizens",
                            "Distance normalized": "Distance between country capital and Tallinn (normalized)"},
                )
fig.show()
In [7]:
# normalize GDP PPP per capita
GDP = df["GDP PPP per capita"]
GDP_norm = (GDP-GDP.min())/ (GDP.max() - GDP.min())

df["GDP PPP per capita normalized"] = GDP_norm
df.sort_values(by="GDP PPP per capita normalized", ascending = False)
Out[7]:
Nimi Name iso_alpha # of Estonian citizens Continent Sub-region Capital Latitude (capital) Longitude (capital) Distance between Capital and Tallinn (km) Population (2020) GDP PPP per capita Formerly part of the USSR? Sovereignty Name and number of citizens Distance normalized GDP PPP per capita normalized
19 Luksemburg Luxembourg LUX 280 Europe Western Europe Luxembourg 49.610600 6.132800 1613.6 625976 117500.20720 No Sovereign Luxembourg\n280 0.089800 1.000000
20 Singapur Singapore SGP 33 Asia South-eastern Asia Singapore 1.300000 103.800000 9266.5 5850343 98520.02954 No Sovereign Singapore\n33 0.538738 0.836736
21 Iirimaa Ireland IRL 2469 Europe Northern Europe Dublin 53.349700 -6.260300 2002.8 4937796 93180.94540 No Sovereign Ireland\n2469 0.112632 0.790810
22 Katar Qatar QAT 8 Asia Western Asia Doha 25.300000 51.533300 4324.3 2881060 89968.77102 No Sovereign Qatar\n8 0.248816 0.763180
23 Bermuda (Gbr) Bermuda BMU 3 Americas Northern America Hamilton 32.294200 -64.783900 6936.5 62273 80829.61681 No Not sovereign Bermuda\n3 0.402054 0.684566
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
14 Ahvenamaa Åland Islands ALA 1 Europe Northern Europe Mariehamn 60.098600 19.944400 279.1 30129 NaN No Not sovereign Åland Islands\n1 0.011515 NaN
15 Antarktis Antarctica ATA 1 Antarctica Antarctica NaN -75.250973 -0.071389 15251.9 4877 NaN No Not sovereign Antarctica\n1 0.889856 NaN
16 Réunion (Fra) Réunion REU 1 Africa Eastern Africa Saint-Denis -20.878900 55.448100 9359.0 895308 NaN No Not sovereign Réunion\n1 0.544164 NaN
17 Svalbard ja Jan Mayen (Nor) Svalbard and Jan Mayen SJM 1 Europe Northern Europe Longyearbyen 78.216700 15.633300 2114.0 2939 NaN No Not sovereign Svalbard and Jan Mayen\n1 0.119155 NaN
18 Wallis ja Futuna (Fra) Wallis and Futuna WLF 1 Oceania Polynesia Mata-Utu -13.282500 -176.173600 14600.7 11246 NaN No Not sovereign Wallis and Futuna\n1 0.851655 NaN

135 rows × 17 columns

In [8]:
fig = px.scatter(df, 
                 x="GDP PPP per capita", y = target_label,
                 color = "Continent",
                 log_y = True,
                 hover_name = "Name",
                 hover_data = {"GDP PPP per capita":":.0f",
                               "Continent":False,
                               target_label:False
                              },
                 labels = {
                     target_label: "# of Estonian citizens",
                     "GDP PPP per capita": "GDP PPP per capita (INT USD, 2020)"
                 }
                )
fig.show()